# Figure_A08.R

# Part of the replication archive for 
#
#   Bullock, John G. 2020. "Education and Attitudes toward Redistribution in
#   the United States." British Journal of Political Science 50.


# This file produces Figure A8 in the appendix to the article: "Estimated 
# effects of education with different sets of instruments."


library(Bullock, lib.loc = c(.libPaths(), 'packageLibrary'))  # for qw()
library(car)      # for Recode()
library(grid)
library(ivpack)   # for cluster.robust.se()
library(lattice)
library(Hmisc)    # for Dotplot()

source('IV_setup.R')
source('functions/estimateModels.R')
source('functions/estTable.R')
source('float_code/dotplotParameters.R')

dirOutput    <- 'float_output/'
filenameStem <- 'Figure_A08' 
PDF_title    <- 'Figure A8: Baseline estimates and first-stage F statistics with different instrument sets' 
varNames     <- qw('eqwlth goveqinc guarantee.7pt govt.health.7pt helppoor welfare')
dfNames      <- qw('GSS.df GSS.df   ANES.df       ANES.df         GSS.df   GSS.df') 



##############################################################################
# SET UP MODELS
##############################################################################
rm(
  list  = ls(pat = '[2-9]$', envir = IVModelsEnv), 
  envir = IVModelsEnv)                               # remove all but baseline model
IVModelsEnv$eqwlth.mod10 <- IVModelsEnv$eqwlth.mod1  # my standard instrument set
IVModelsEnv$eqwlth.mod1  <- update(IVModelsEnv$eqwlth.mod1, . ~ . | . - CA.fac  + CA.fac4)
IVModelsEnv$eqwlth.mod2  <- update(IVModelsEnv$eqwlth.mod1, . ~ . | . - CA.fac4 + CL.fac4)
IVModelsEnv$eqwlth.mod3  <- update(IVModelsEnv$eqwlth.mod1, . ~ . | .           + CL.fac4)
IVModelsEnv$eqwlth.mod4  <- update(IVModelsEnv$eqwlth.mod1, . ~ . | .           + CL.fac4 + CA.fac4:CL.fac4)
IVModelsEnv$eqwlth.mod5  <- update(IVModelsEnv$eqwlth.mod1, . ~ . | . - CA.fac4 + CL.facGeq9)
IVModelsEnv$eqwlth.mod6  <- update(IVModelsEnv$eqwlth.mod1, . ~ . | . - CA.fac4 + CL)
IVModelsEnv$eqwlth.mod7  <- update(IVModelsEnv$eqwlth.mod1, . ~ . | . - CA.fac4 + leaving_age16)
IVModelsEnv$eqwlth.mod9  <- update(IVModelsEnv$eqwlth.mod1, . ~ . | . - CA.fac4 + CA.facGeq10 + CL.facGeq8 + (CA.facGeq10 + CL.facGeq8):yearYoung)

# ADD MODNUM ATTRIBUTE
attributes(IVModelsEnv$eqwlth.mod1)$modNum  <-  1    
attributes(IVModelsEnv$eqwlth.mod2)$modNum  <-  2    
attributes(IVModelsEnv$eqwlth.mod3)$modNum  <-  3    
attributes(IVModelsEnv$eqwlth.mod4)$modNum  <-  4    
attributes(IVModelsEnv$eqwlth.mod5)$modNum  <-  5    
attributes(IVModelsEnv$eqwlth.mod6)$modNum  <-  6    
attributes(IVModelsEnv$eqwlth.mod7)$modNum  <-  7    
attributes(IVModelsEnv$eqwlth.mod9)$modNum  <-  9    
attributes(IVModelsEnv$eqwlth.mod10)$modNum <- 10    



##############################################################################
# PRELIMINARIES FOR DRAWING THE FIGURE
##############################################################################
panelWidth       <- list(1.24, "inches")               
panelHeight      <- list(1.70, "inches")
stripHeight      <- list(lines = .95, lineheight = 14) # lineheight is space between lines
panelBorderWidth <- .3
panelLayout      <- c(6, 2)  # columns, then rows
nPanels          <- panelLayout[1] * panelLayout[2] 
xBetween         <- 0.8000   # space between columns
yBetween         <- 3.2500   # space between rows
baseCexSize      <- 1.00     # cex
stripTextSize    <-  .700 * baseCexSize 
xLabSize         <- 1.000 * baseCexSize 
yLabSize         <- 1.000 * baseCexSize 
xAxisTextSize    <-  .670 * baseCexSize 
yAxisTextSize    <-  .710 * baseCexSize
axisTickSize     <-  .4
xlistLimits <- split(
  x = c(
    rep(c(-.225, .225), 6),
    rep(c(-2.5, 102.5), 6)), 
  f = rep(1:nPanels, each = 2))
xlistLabels <- split(
  x = c(rep(qw("-.2 -.1 0 .1 .2"), 6),   
        rep(qw("10 35 60 85"),   6)), 
  f = c(rep(1:6,  each = 5), 
        rep(7:12, each = 4)))
xlistAt <- lapply(xlistLabels, as.numeric)
xlist <- list(
  draw        = TRUE,         # if ybetween is too small, will only draw for bottom panels 
  limits      = xlistLimits,
  labels      = xlistLabels,
  at          = xlistAt,  
  tck         = c(axisTickSize, 0), 
  col         = panelBorderCol, 
  cex         = xAxisTextSize,
  alternating = 1, 
  relation    = 'free', 
  axs         = 'i')  # axs='i' means that there is no padding around the xlimits
ylist <- list(
  draw        = TRUE,
  limits      = c(.5, 9.6),
  labels      = c("AA1 and LM", "AA2 and OPS", "AA3 and MMO", "AA4", "Dee", "L\U00ADM and GL\U00ADM", "O1", "prev. version", "current version"),
  at          = 9:1,
  tck         = 0,
  col         = panelBorderCol,
  cex         = yAxisTextSize,
  alternating = c(1, 1),
  relation    = 'same',  
  axs         = 'i')



##############################################################################
# ESTIMATE MODELS AND GET F STATISTICS
##############################################################################
IVModelObjectsEnv <- estimateModels(
  varNames     = varNames, 
  modelEnvir   = IVModelsEnv, 
  dfNames      = dfNames,
  objectSuffix = '.IV') 

Fstats <- sapply(
  IVModelObjectsEnv, 
  function (x) { 
    summary(x, diagnostics = TRUE)$diagnostics['Weak instruments', 'statistic'] 
  } 
)



##############################################################################
# CREATE DATA FRAME WITH DATA TO PLOT
##############################################################################
dataToPlot <- expand.grid(
  lower     = NA, 
  pred      = NA, 
  upper     = NA,
  statistic = c('2SLS', 'F'),
  model     = sort(as.integer(eapply(IVModelsEnv, function (x) attributes(x)$modNum))),
  depVar    = varNames)

for (varName in varNames) {
  for (modNum in unique(dataToPlot$model)) {

    # Record 2SLS estimates and CIs
    tmpEduc         <- get(paste0(varName, '.IV', modNum), IVModelObjectsEnv)
    tmpEducClusters <- droplevels(tmpEduc$model$stateYoung:factor(tmpEduc$model$yearYoung))
    coefEstEduc     <- coeftest(tmpEduc)['educ', 'Estimate']
    coefSEEduc      <- cluster.robust.se(tmpEduc, tmpEducClusters)['educ',     'Std. Error']
    dataToPlot[dataToPlot$statistic == '2SLS' & dataToPlot$model == modNum & dataToPlot$depVar == varName, c("lower", "pred", "upper")] <- c(
      coefEstEduc - 1.96*coefSEEduc,
      coefEstEduc,
      coefEstEduc + 1.96*coefSEEduc)    
      
    # Record F statistics for exclusion of instruments in first stage
    dataToPlot[dataToPlot$statistic == 'F' & dataToPlot$model == modNum & dataToPlot$depVar == varName, "pred"] <- Fstats[paste0(varName, '.IV', modNum)]
  } 
}  


# Order the panels according to the order of varNames, which is the order that
# I use for the tables in the paper.
for (i in 1:length(varNames)) {
  dataToPlot$panelNum[grepl(varNames[i], dataToPlot$depVar)] <- i
}

# ADD ROW NUMBERS
# Add row numbers that order the rows by average effect size.  
tmp      <- subset(dataToPlot, statistic == '2SLS', qw("pred model depVar"))
tmpSizes <- tapply(tmp$pred, tmp$model, mean)
tmpSizes <- sort(tmpSizes)
dataToPlot$rowNum <- NA
for (modNum in dataToPlot$model) {
  dataToPlot$rowNum[dataToPlot$model == modNum] <- which(names(tmpSizes) == modNum)
}

# Order the row names by average effect size.
rownameOrder <- Recode(names(tmpSizes), '9=8; 10=9')
rownameOrder <- rev(rownameOrder)
ylist$labels <- ylist$labels[rownameOrder]



##############################################################################
# FIND MINIMUM AND MAXIMUM SAMPLE SIZES
##############################################################################
nMin <- apply(
  X      = getNobsForMatrixOfModels(varNames, objectEnvir = IVModelObjectsEnv), 
  MARGIN = 2, 
  FUN    = min)
nMax <- apply(
  X      = getNobsForMatrixOfModels(varNames, objectEnvir = IVModelObjectsEnv), 
  MARGIN = 2, 
  FUN    = max)
nMin
nMax



##############################################################################
# STRIP FUNCTION
##############################################################################
horizStrip <- function(...) {
  stripText       <- gsub('\\n\\(.*', '', stripText)
  stripText       <- gsub('std\\.', 'standard', stripText)
  stripTextDouble <- c(stripText, stripText)
  lpolygon(
    x        = c(0,1,1,0,0), 
    y        = c(0,0,1,1,0), 
    col      = stripBackgroundCol, 
    border   = stripBorderCol,
    linejoin = 'mitre',  # for square corners
    lwd      = panelBorderWidth)
  ltext(
    x          = .5, 
    y          = .5, 
    labels     = stripTextDouble[panel.number()], 
    font       = 2, 
    cex        = stripTextSize, 
    lineheight = 1)  # .4 for one-line labels         
}  



##############################################################################
# PANEL FUNCTION
##############################################################################
myPanel <- function(...) {
  #trellis.par.set("clip", list(panel = "on", strip = "on")) 
  if (panel.number() <= 6) {
    panel.abline(v = 0,  col = dotplotLineColor)
  } else {
    panel.abline(v = 10, col = dotplotLineColor)    
  }
  
  panel.Dotplot(...)
  
  # Put tick marks at top of each panel
  panel.axis(
    side        = "top",
    at          = if (panel.number() <= 6) xlist$at[[1]] else xlist$at[[7]],
    draw.labels = FALSE,
    half        = FALSE,
    tck         = axisTickSize * .75)
  
  # Put panel number in each panel
  # grid.text(panel.number(), .1, .2)
  
  #trellis.par.set("clip", list(panel = "off", strip = "off")) # for permitting panel.axis(outside=T, ...)
  #trellis.par.set("axis.text", list(cex=.65)) # set size of axis text
  # panel.axis(side="bottom", outside = TRUE, at=2:5, label=c('2','3','4','5'), tck=.5, rot=c(0, 0))
}



##############################################################################
# MAKE THE PDF FILE
##############################################################################
pdf(
  file   = paste0(dirOutput, filenameStem, '.pdf'), 
  width  = PS_width, 
  height = PS_height, 
  paper  = "special", 
  title  = PDF_title,
  bg     = postscriptBackground)
trellis.par.set(
  axis.components = list(
    left = list(pad1 = .5)),   # distance between axis ticks and tick labels
  axis.line = list(
    alpha = 1, 
    col   = panelBorderCol,    
    lty   = 1, 
    lwd   = panelBorderWidth),  
  dot.line = list(alpha=1, col=dotplotLineColor, lty=1, lwd=1),
  dot.symbol = list(
    alpha = 1, 
    cex   = .5, 
    col   = dotColor, 
    font  = 1, 
    pch   = 16),
  plot.line = list(
    alpha = 1, col = dotColor, lty = 1, lwd = 1),
  superpose.line = list(
    alpha = c(1,1), 
    col   = c('black', plotLineColor), 
    lty   = c("solid", "43"), 
    lwd   = c(1, plotLineWidth))
)
mainResultsDotplot <- Dotplot(
  rowNum ~ Cbind(pred, lower, upper) | panelNum * statistic, 
  data           = dataToPlot,
  panel          = myPanel,
  layout         = panelLayout,
  as.table       = TRUE,
  between        = list(x = xBetween, y = yBetween),                   
  strip          = horizStrip,
  par.strip.text = stripHeight,
  scales         = list(x = xlist, y = ylist),
  xlab           = '',
  ylab           = '',
  par.settings   = list(clip = list(panel="on", strip="on") ))
print(
  mainResultsDotplot, 
  panel.width  = panelWidth, 
  panel.height = panelHeight)

# ADD LABELS BELOW EACH ROW
label2SLS <- 'Marginal effects of a year of education on attitudes (2SLS estimates)'
labelF    <- expression(paste("First\U00ADstage ", italic(F), " statistics"))
downViewport("plot_01.figure.vp")
grid.text(
  label = c(label2SLS, labelF),
  y     = c(.535, -.04),
  gp    = gpar(cex = xLabSize)) 


# SAVE AND PROCESS FILE
dev.off()
if (!(Sys.which('pdfcrop')=='' | Sys.which('perl')=='')  |  'pdfcrop' %in% installed.packages()[, 'Package']) {  # if "pdfcrop" is installed 
  system(paste(
    if (Sys.info()['sysname']=='Windows') paste(Sys.getenv('Comspec'), '/c ') else '',
    'pdfcrop', 
    paste0(dirOutput, filenameStem, '.pdf'), 
    paste0(dirOutput, filenameStem, '_crop.pdf')))
  file.remove(paste0(dirOutput, filenameStem, '.pdf'))
  if (interactive() & Sys.info()['sysname']=='Windows') shell.exec(paste0('c:/school/', dirOutput, filenameStem, '_crop.pdf'))
}


##############################################################################
# RESULTS REPORTED IN THE PROSE OF THE APPENDIX
##############################################################################
if (interactive()) {
  dataToPlot %>% filter(depVar == 'eqwlth', statistic == '2SLS') 
  dataToPlot %>% filter(depVar == 'guarantee.7pt', statistic == '2SLS')
}
